/**
 *
 */
package com.feizhi.common.extractor;

import java.io.InputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

/**
 * @author shuaqiu
 */
public class PdfTextExtractor extends TextExtractor {

    private static final Log LOG = LogFactory.getLog(PdfTextExtractor.class);

    @Override
    public String extract(final InputStream stream) {
        try {
            final PDDocument document = PDDocument.load(stream);
            final PDFTextStripper stripper = new PDFTextStripper();
            final String text = stripper.getText(document);
            document.close();

            return text;
        } catch (final Exception e) {
            LOG.error(e.getMessage(), e);
        }

        return "";
    }
}
